--- title: Title keywords: fastai sidebar: home_sidebar nb_path: "nbs\90_tutorial.ipynb" ---
{% raw %}
{% endraw %}

Open In Colab

{% raw %}
from fastai.torch_core import set_seed
from fastai.vision.all import *
from fastai.medical.imaging import *
from torchvision.utils import save_image

from fmi.data import *
from fmi.core import *
from fmi.windows import *
{% endraw %} {% raw %}
source = 'D:/Datasets/Melanoma/'
files = os.listdir(source)
mel_items = get_dicom_files(source, folders='train')
{% endraw %} {% raw %}
source2 = 'D:/Datasets/osic-pulmonary-fibrosis-progression/'
os_items = get_dicom_files(source2, folders='train')
df_unique = pd.read_csv(f'{source2}/uniq.csv')
test1 = 'D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00009637202177434476278'
test_files = get_dicom_files(test1)
{% endraw %}

Splits

{% raw %}
def random_(items, value=500):
    randomList = []
    for i in range(0,value):
        randomList.append(random.randint(0,10000))
    return items[randomList]
{% endraw %} {% raw %}
random_(os_items, 100)
(#100) [Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00078637202199415319443/949.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00042637202184406822975/363.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00014637202177757139317/11.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00009637202177434476278/231.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00038637202182690843176/242.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00078637202199415319443/889.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00078637202199415319443/892.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00082637202201836229724/186.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00104637202208063407045/474.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00042637202184406822975/49.dcm')...]
{% endraw %} {% raw %}
check_duplicate(random_(mel_items, 100), seed=77)
Duplicate: set()
((#80) [17,80,74,39,54,27,23,7,69,44...],
 (#20) [55,4,90,14,47,22,64,13,96,65...])
{% endraw %}

DataBlock

{% raw %}
set_seed(7)
get_x1 = lambda x:f'{source2}/pct_window/{x.Patient}/{str(x.file)[2:-2]}'
get_y1 = ColReader('SmokingStatus')

blocks = (ImageBlock(cls=PEWindow), CategoryBlock)
db = DataBlock(blocks=blocks,
                   get_x=get_x1,
                   splitter=RandomSplitter(),
                   item_tfms=Resize(512),
                   get_y=get_y1,
                   batch_tfms=[IntToFloatTensor(div=255)])
{% endraw %} {% raw %}
db.summary(df_unique)
Setting-up type transforms pipelines
Collecting items from      Unnamed: 0                    Patient  Weeks   FVC    Percent  Age   Sex  \
0             0  ID00007637202177411956430     -4  2315  58.253649   79  Male   
1             1  ID00009637202177434476278      8  3660  85.282878   69  Male   
2             2  ID00010637202177584971671      0  3523  94.724672   60  Male   
3             3  ID00011637202177653955184      6  3326  85.987590   72  Male   
4             4  ID00012637202177665765362     33  3418  93.726006   65  Male   
..          ...                        ...    ...   ...        ...  ...   ...   
171         171  ID00419637202311204720264      6  3020  70.186855   73  Male   
172         172  ID00421637202311550012437     15  2739  82.045291   68  Male   
173         173  ID00422637202311677017371      6  1930  76.672493   73  Male   
174         174  ID00423637202312137826377     17  3294  79.258903   72  Male   
175         175  ID00426637202313170790466      0  2925  71.824968   73  Male   

    SmokingStatus         file  
0       Ex-smoker   ['10.dcm']  
1       Ex-smoker  ['100.dcm']  
2       Ex-smoker   ['51.dcm']  
3       Ex-smoker    ['3.dcm']  
4    Never smoked   ['22.dcm']  
..            ...          ...  
171     Ex-smoker   ['15.dcm']  
172     Ex-smoker   ['20.dcm']  
173     Ex-smoker  ['462.dcm']  
174     Ex-smoker  ['189.dcm']  
175  Never smoked  ['209.dcm']  

[176 rows x 9 columns]
Found 176 items
2 datasets of sizes 141,35
Setting up Pipeline: <lambda> -> PEWindow.create
Setting up Pipeline: ColReader -- {'cols': 'SmokingStatus', 'pref': '', 'suff': '', 'label_delim': None} -> Categorize -- {'vocab': None, 'sort': True, 'add_na': False}

Building one sample
  Pipeline: <lambda> -> PEWindow.create
    starting from
      Unnamed: 0                              35
Patient          ID00089637202204675567570
Weeks                                    7
FVC                                   2478
Percent                            57.9676
Age                                     63
Sex                                   Male
SmokingStatus                 Never smoked
file                            ['22.dcm']
Name: 35, dtype: object
    applying <lambda> gives
      D:/Datasets/osic-pulmonary-fibrosis-progression//pct_window/ID00089637202204675567570/22.dcm
    applying PEWindow.create gives
      PEWindow mode=L size=768x768
  Pipeline: ColReader -- {'cols': 'SmokingStatus', 'pref': '', 'suff': '', 'label_delim': None} -> Categorize -- {'vocab': None, 'sort': True, 'add_na': False}
    starting from
      Unnamed: 0                              35
Patient          ID00089637202204675567570
Weeks                                    7
FVC                                   2478
Percent                            57.9676
Age                                     63
Sex                                   Male
SmokingStatus                 Never smoked
file                            ['22.dcm']
Name: 35, dtype: object
    applying ColReader -- {'cols': 'SmokingStatus', 'pref': '', 'suff': '', 'label_delim': None} gives
      Never smoked
    applying Categorize -- {'vocab': None, 'sort': True, 'add_na': False} gives
      TensorCategory(2, dtype=torch.int32)

Final sample: (PEWindow mode=L size=768x768, TensorCategory(2, dtype=torch.int32))


Setting up after_item: Pipeline: Resize -- {'size': (512, 512), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (2, 0), 'p': 1.0} -> ToTensor
Setting up before_batch: Pipeline: 
Setting up after_batch: Pipeline: IntToFloatTensor -- {'div': 255, 'div_mask': 1}

Building one batch
Applying item_tfms to the first sample:
  Pipeline: Resize -- {'size': (512, 512), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (2, 0), 'p': 1.0} -> ToTensor
    starting from
      (PEWindow mode=L size=768x768, TensorCategory(2, dtype=torch.int32))
    applying Resize -- {'size': (512, 512), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (2, 0), 'p': 1.0} gives
      (PEWindow mode=L size=512x512, TensorCategory(2, dtype=torch.int32))
    applying ToTensor gives
      (TensorDicom of size 1x512x512, TensorCategory(2, dtype=torch.int32))

Adding the next 3 samples

No before_batch transform to apply

Collating items in a batch

Applying batch_tfms to the batch built
  Pipeline: IntToFloatTensor -- {'div': 255, 'div_mask': 1}
    starting from
      (TensorDicom of size 4x1x512x512, TensorCategory([2, 2, 1, 2], device='cuda:0', dtype=torch.int32))
    applying IntToFloatTensor -- {'div': 255, 'div_mask': 1} gives
      (TensorDicom of size 4x1x512x512, TensorCategory([2, 2, 1, 2], device='cuda:0', dtype=torch.int32))
{% endraw %} {% raw %}
db.batch_tfms
(#1) [IntToFloatTensor -- {'div': 255, 'div_mask': 1}:
encodes: (TensorImage,object) -> encodes
(TensorMask,object) -> encodes
decodes: (TensorImage,object) -> decodes
]
{% endraw %} {% raw %}
dls = db.dataloaders(df_unique, bs=16, n_workers=0)
dls.show_batch(figsize=(20,20))
{% endraw %}

Sort

{% raw %}
s = 'D:/Datasets/osic-pulmonary-fibrosis-progression/train/'
sort_items = get_dicom_files(s, folders='ID00007637202177411956430')
sort_items
(#30) [Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/1.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/10.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/11.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/12.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/13.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/14.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/15.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/16.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/17.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/18.dcm')...]
{% endraw %}

View images in the folder - they are not sorted in order

{% raw %}
%%time
imgs = []
for filename in sort_items:
    file = dcmread(filename).pixel_array
    img = TensorDicom(file)
    imgs.append(img)
show_images(imgs, nrows=3)
Wall time: 544 ms
{% endraw %} {% raw %}
%%time
instance_show(sort_items, nrows=3)
Wall time: 564 ms
{% endraw %}

Mask and Save

{% raw %}
m_items = get_dicom_files('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430')
source = 'D:/Datasets/osic-pulmonary-fibrosis-progression/train/'
dicom_dataframe = pd.DataFrame.from_dicoms(m_items)
dicom_dataframe[:2]
SpecificCharacterSet ImageType SOPInstanceUID Modality Manufacturer ManufacturerModelName PatientName PatientID PatientSex DeidentificationMethod ... ImageOrientationPatient3 ImageOrientationPatient4 ImageOrientationPatient5 MultiPixelSpacing PixelSpacing1 img_min img_max img_mean img_std img_pct_window
0 ISO_IR 100 ORIGINAL 2.25.12297650151329871895440507938349160734 CT GE MEDICAL SYSTEMS LightSpeed VCT (I, D, 0, 0, 0, 0, 7, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 1, 1, 9, 5, 6, 4, 3, 0) ID00007637202177411956430 Table; ... 0.0 1.0 0.0 1 0.652344 -2000 2842 -1.454884 1137.488858 0.058094
1 ISO_IR 100 ORIGINAL 2.25.37611372879908126511187998276199853341 CT GE MEDICAL SYSTEMS LightSpeed VCT (I, D, 0, 0, 0, 0, 7, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 1, 1, 9, 5, 6, 4, 3, 0) ID00007637202177411956430 Table; ... 0.0 1.0 0.0 1 0.652344 -2000 2918 19.038597 1138.876560 0.068130

2 rows × 67 columns

{% endraw %} {% raw %}
mask_and_save(dicom_dataframe[:20], show=True, source=source, sigma=0.2, save=False)
{% endraw %} {% raw %}
mask_and_save(dicom_dataframe[:20], show=True, source=source, sigma=0.1, save=False)
{% endraw %} {% raw %}
mask_and_save(dicom_dataframe[:20], show=True, source=source, sigma=0.01, save=False)
{% endraw %}

pct_window, mean, std

{% raw %}
pct = dicom_dataframe[['PatientID', 'InstanceNumber', 'img_pct_window', 'img_mean', 'img_std']].sort_values(by=['img_pct_window'], ascending=False).reset_index(drop=True)
pct[:5]
PatientID InstanceNumber img_pct_window img_mean img_std
0 ID00007637202177411956430 20 0.077923 109.478535 1174.014672
1 ID00007637202177411956430 21 0.075516 127.095284 1182.195403
2 ID00007637202177411956430 29 0.075508 210.377762 1217.328780
3 ID00007637202177411956430 18 0.075157 97.442841 1172.540524
4 ID00007637202177411956430 28 0.075096 212.950813 1219.356338
{% endraw %} {% raw %}
get_dicom_image(pct[:30], 'img_pct_window', source=source) 
{% endraw %} {% raw %}
mean = dicom_dataframe[['PatientID', 'InstanceNumber', 'img_pct_window', 'img_mean', 'img_std']].sort_values(by=['img_mean'], ascending=True).reset_index(drop=True)
mean[:5]
PatientID InstanceNumber img_pct_window img_mean img_std
0 ID00007637202177411956430 1 0.058094 -1.454884 1137.488858
1 ID00007637202177411956430 10 0.068130 19.038597 1138.876560
2 ID00007637202177411956430 9 0.064880 25.004814 1146.082668
3 ID00007637202177411956430 11 0.064548 27.237549 1146.816206
4 ID00007637202177411956430 8 0.062935 29.426968 1150.758986
{% endraw %} {% raw %}
get_dicom_image(mean[:30], 'img_mean', source=source) 
{% endraw %} {% raw %}
std = dicom_dataframe[['PatientID', 'InstanceNumber', 'img_pct_window', 'img_mean', 'img_std']].sort_values(by=['img_std'], ascending=True).reset_index(drop=True)
std[:5]
PatientID InstanceNumber img_pct_window img_mean img_std
0 ID00007637202177411956430 1 0.058094 -1.454884 1137.488858
1 ID00007637202177411956430 10 0.068130 19.038597 1138.876560
2 ID00007637202177411956430 12 0.069099 31.026043 1142.850763
3 ID00007637202177411956430 9 0.064880 25.004814 1146.082668
4 ID00007637202177411956430 11 0.064548 27.237549 1146.816206
{% endraw %} {% raw %}
get_dicom_image(std[:30], 'img_std', source=source) 
{% endraw %}

Metadata dict

{% raw %}
dicom_dataframe = pd.DataFrame.from_dicoms2(test_files, windows=dicom_windows.mediastinum)
dicom_dataframe[:2]
SpecificCharacterSet ImageType SOPInstanceUID Modality Manufacturer ManufacturerModelName PatientName PatientID PatientSex DeidentificationMethod ... WindowWidth RescaleIntercept RescaleSlope RescaleType img_min img_max img_mean img_std img_pct_window file_path
0 [ISO 2022 IR 100, ISO 2022 IR 100] [DERIVED, SECONDARY, AXIAL, CT_SOM5 SPI] 1.2.276.0.7230010.3.1.4.0.37492.1591423150.182195 CT SIEMENS OsiriX (I, D, 0, 0, 0, 0, 9, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 3, 4, 4, 7, 6, 2, 7, 8) ID00009637202177434476278 Table; ... 1500.0 -1024.0 1.0 HU 0 4573 571.728331 568.734741 0.216307 ID00009637202177434476278/1.dcm
1 [ISO 2022 IR 100, ISO 2022 IR 100] [DERIVED, SECONDARY, AXIAL, CT_SOM5 SPI] 1.2.276.0.7230010.3.1.4.0.37492.1591423150.182204 CT SIEMENS OsiriX (I, D, 0, 0, 0, 0, 9, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 3, 4, 4, 7, 6, 2, 7, 8) ID00009637202177434476278 Table; ... 1500.0 -1024.0 1.0 HU 0 4346 591.064126 568.077568 0.217928 ID00009637202177434476278/10.dcm

2 rows × 52 columns

{% endraw %} {% raw %}
dicom_dataframe = pd.DataFrame.from_dicoms2(test_files, windows=dicom_windows.lungs)
dicom_dataframe[:2]
SpecificCharacterSet ImageType SOPInstanceUID Modality Manufacturer ManufacturerModelName PatientName PatientID PatientSex DeidentificationMethod ... WindowWidth RescaleIntercept RescaleSlope RescaleType img_min img_max img_mean img_std img_pct_window file_path
0 [ISO 2022 IR 100, ISO 2022 IR 100] [DERIVED, SECONDARY, AXIAL, CT_SOM5 SPI] 1.2.276.0.7230010.3.1.4.0.37492.1591423150.182195 CT SIEMENS OsiriX (I, D, 0, 0, 0, 0, 9, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 3, 4, 4, 7, 6, 2, 7, 8) ID00009637202177434476278 Table; ... 1500.0 -1024.0 1.0 HU 0 4573 571.728331 568.734741 0.851178 ID00009637202177434476278/1.dcm
1 [ISO 2022 IR 100, ISO 2022 IR 100] [DERIVED, SECONDARY, AXIAL, CT_SOM5 SPI] 1.2.276.0.7230010.3.1.4.0.37492.1591423150.182204 CT SIEMENS OsiriX (I, D, 0, 0, 0, 0, 9, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 3, 4, 4, 7, 6, 2, 7, 8) ID00009637202177434476278 Table; ... 1500.0 -1024.0 1.0 HU 0 4346 591.064126 568.077568 0.843967 ID00009637202177434476278/10.dcm

2 rows × 52 columns

{% endraw %}

Convert to 3 channel

{% raw %}
save_dir = 'D:/Datasets/osic-pulmonary-fibrosis-progression/test3c/'
test1 = test_files[12]
test1
Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00009637202177434476278/11.dcm')
{% endraw %} {% raw %}
dicom_convert_3channel(test1, save_dir, win1=dicom_windows.lungs, win2=dicom_windows.mediastinum, win3=dicom_windows.pe)
{% endraw %}

Test saved image

{% raw %}
saved_image = PILImage.create('D:/Datasets/osic-pulmonary-fibrosis-progression/test3c/11.jpg')
saved_ten = TensorImage(saved_image)
saved_ten.shape
torch.Size([768, 768, 3])
{% endraw %} {% raw %}
show_images([saved_ten[:,:,:], saved_ten[:,:,0], saved_ten[:,:,1], saved_ten[:,:,2]],\
            titles=['all_channels', 'channel_one', 'channel_two', 'channel_three'])
{% endraw %}